package com.sxd.bill.ner.utils;

import java.util.Set;
import java.util.HashSet;

public class WordUtitly {
    static Set<Character> seps = new HashSet<>();
    static {
        seps.add(' ');
        seps.add('，');
        seps.add('、');
        seps.add('/');
    }

    public static String getCharKind(char chr) {
        if (Character.isDigit(chr)) {
            return "数";
        } else if ((chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z')) {
            return "英";
        } else if (chr >= '\u4e00' && chr <= '\u9fa5') {
            return "中";
        } else if (seps.contains(chr)) {
            return "S";
        } else {
            return "X";
        }
    }

    public static String[] getCharKind(String text) {
        String[] cates = new String[text.length()];
        for (int i = 0; i < text.length(); i++) {
            Character ch = text.charAt(i);
            cates[i] = getCharKind(ch);
        }
        return cates;
    }
}
